# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Reducing Prejudice and Support for Religious 
### Nationalism Through Conversations on WhatsApp 

### Author: Rajeshwari Majumdar
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# File:     supp_balance.R
# Purpose:  perform balance tests (Appendix D.2); 
#           produce Tables D.1, D.2, D.3 
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Load data 
rm(list=ls())
load("data/Completes_Merged_Anonymized.RData")


# Create functions for balance tests ----

## Partner treatment ---- 
# Balance test basic function for one variable (partner treatment)
balance_var_partner = function(x, treat) {
  x1 = x[treat == "HH"]
  x0 = x[treat == "HM"]
  
  if (length(na.omit(x1)) < 2 || length(na.omit(x0)) < 2) {
    return(c(mean_HH = NA, mean_HM = NA, diff = NA, pval = NA))
  }
  
  tt = t.test(x1, x0)
  
  c(
    n_HH = sum(!is.na(x1)),
    n_HM = sum(!is.na(x0)),
    mean_HH = unname(tt$estimate[1]),
    mean_HM = unname(tt$estimate[2]),
    diff = abs(mean(x1, na.rm = TRUE) - mean(x0, na.rm = TRUE)),
    pval    = tt$p.value
  )
}

# Balance test main function (partner treatment)
fn_balance_partner = function(data, vars, treat_var = "condition_pair") {
  
  treat = data[[treat_var]]
  
  out = lapply(vars, function(v) {
    stats = balance_var_partner(data[[v]], treat)
    data.frame(
      var     = v,
      n_HH    = stats["n_HH"],
      n_HM    = stats["n_HM"],
      mean_HH = stats["mean_HH"],
      mean_HM = stats["mean_HM"],
      diff    = stats["diff"],
      pval    = stats["pval"]
    )
  })
  
  out = do.call(rbind, out)
  
  # formatting
  out[, -1] = round(out[, -1], 2)
  out$signif = cut(out$pval, breaks = c(-Inf, 0.01, 0.05, 0.1, Inf), labels = c("***", "**", "*", ""), right = FALSE)  
  rownames(out) = NULL
  out
} 

## Topic treatment ---- 
# Balance test basic function for one variable (topic treatment)
balance_var_topic = function(x, group) {
  x_non = x[group == "non"]
  x_gen = x[group == "gen"]
  x_igr = x[group == "igr"]
  
  n_non = sum(!is.na(x_non))
  n_gen = sum(!is.na(x_gen))
  n_igr = sum(!is.na(x_igr))
  
  mean_non = if(n_non > 0) mean(x_non, na.rm = TRUE) else NA
  mean_gen = if(n_gen > 0) mean(x_gen, na.rm = TRUE) else NA
  mean_igr = if(n_igr > 0) mean(x_igr, na.rm = TRUE) else NA
  
  diff_non_gen = if(!is.na(mean_non) & !is.na(mean_gen)) abs(mean_non - mean_gen) else NA
  diff_non_igr = if(!is.na(mean_non) & !is.na(mean_igr)) abs(mean_non - mean_igr) else NA
  diff_gen_igr = if(!is.na(mean_gen) & !is.na(mean_igr)) abs(mean_gen - mean_igr) else NA
  
  # Overall ANOVA p-value
  all_vals = x[!is.na(x) & group %in% c("non","gen","igr")]
  all_groups = factor(group[!is.na(x) & group %in% c("non","gen","igr")])
  
  if(length(unique(all_groups)) > 1 && all(sapply(split(all_vals, all_groups), length) > 1)) {
    aov_res = aov(all_vals ~ all_groups)
    pval_overall = summary(aov_res)[[1]][["Pr(>F)"]][1]
  } else {
    pval_overall = NA
  }
  
  # Pairwise t-test p-values
  pval_non_gen = if(length(na.omit(x_non))>1 && length(na.omit(x_gen))>1) t.test(x_non, x_gen)$p.value else NA
  pval_non_igr = if(length(na.omit(x_non))>1 && length(na.omit(x_igr))>1) t.test(x_non, x_igr)$p.value else NA
  pval_gen_igr = if(length(na.omit(x_gen))>1 && length(na.omit(x_igr))>1) t.test(x_gen, x_igr)$p.value else NA
  
  c(
    n_non = n_non,
    n_gen = n_gen,
    n_igr = n_igr,
    mean_non = mean_non,
    mean_gen = mean_gen,
    mean_igr = mean_igr,
    diff_non_gen = diff_non_gen,
    diff_non_igr = diff_non_igr,
    diff_gen_igr = diff_gen_igr,
    pval_overall = pval_overall,
    pval_non_gen = pval_non_gen,
    pval_non_igr = pval_non_igr,
    pval_gen_igr = pval_gen_igr
  )
}

# Balance test main function (topic treatment)
fn_balance_topic = function(data, vars, treat_var = "condition_topic") {
  
  group = data[[treat_var]]
  
  out = lapply(vars, function(v) {
    stats = balance_var_topic(data[[v]], group)
    data.frame(
      var = v,
      
      # Ns
      n_non = stats["n_non"],
      n_gen = stats["n_gen"],
      n_igr = stats["n_igr"],
      
      # Means
      mean_non = stats["mean_non"],
      mean_gen = stats["mean_gen"],
      mean_igr = stats["mean_igr"],
      
      # Differences
      diff_non_gen = stats["diff_non_gen"],
      diff_non_igr = stats["diff_non_igr"],
      diff_gen_igr = stats["diff_gen_igr"],
      
      # p-values
      pval_overall = stats["pval_overall"],
      pval_non_gen = stats["pval_non_gen"],
      pval_non_igr = stats["pval_non_igr"],
      pval_gen_igr = stats["pval_gen_igr"],
      
      stringsAsFactors = FALSE
    )
  })
  
  out = do.call(rbind, out)
  
  # Round numeric columns
  num_cols = setdiff(names(out), "var")
  out[num_cols] = round(out[num_cols], 2)
  
  # Significance stars (overall and pairwise)
  out$signif_overall = cut(out$pval_overall, breaks = c(-Inf,0.01,0.05,0.1,Inf), labels=c("***","**","*",""), right=FALSE)
  out$signif_non_gen = cut(out$pval_non_gen, breaks = c(-Inf,0.01,0.05,0.1,Inf), labels=c("***","**","*",""), right=FALSE)
  out$signif_non_igr = cut(out$pval_non_igr, breaks = c(-Inf,0.01,0.05,0.1,Inf), labels=c("***","**","*",""), right=FALSE)
  out$signif_gen_igr = cut(out$pval_gen_igr, breaks = c(-Inf,0.01,0.05,0.1,Inf), labels=c("***","**","*",""), right=FALSE)
  
  rownames(out) = NULL
  out
}


# Select variables to test ----

d$hasfriend.hmoutgroup = ifelse(d$religion=="Hindu", d$hasfriend_muslim, d$hasfriend_hindu)
d$feelth.hmoutgroup = ifelse(d$religion=="Hindu", d$feelth_muslim, d$feelth_hindu)

balance.vars = c(
  # Individual demographics  
  "age","gender_f","college","castefw","married",
  # Individual preferences, attitudes, experiences
  "socialapps_WhatsApp","socialmedia_score",
  "religiosity_score", "supportBJP",
  "avg.ingr.outgr","feelth.hmoutgroup",
  "hasfriend.hmoutgroup","underwent.religious",
  "ECscore","PTscore",
  # Opinions about discussion issues
  ## Day 2: misinfo
  "cncn_misinfo","hndl_misinfo",
  ## Day 3: policing/corruption
  "cncn_policecorr","hndl_policecorr","feelth_police",
  ## Day 4: foreign policy
  "cncn_indochi","hndl_indochi",
  "cncn_indopak","hndl_indopak",
  ## Day 5: domestic 
  "cncn_econ", "hndl_econ", 
  "cncn_democ","hndl_democ"
)


# Produce tables ----
## Table D.1: Variables used in balance checks ----
d %>% select(all_of(balance.vars)) %>% summary()

## Table D.2: Balance checks for Hindus across partner religion conditions ----
fn_balance_partner(data = d %>% filter(religion == "Hindu"), vars = balance.vars)

## Table D.3: Balance checks for Muslims across conversation topic conditions ----
fn_balance_topic(data = d %>% filter(religion == "Muslim"), vars = balance.vars, treat_var = "condition_topic")
